In [1]:

    
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import seaborn as sns
from datetime import timedelta
from matplotlib.ticker import MaxNLocator
%matplotlib inline



In [2]:

    
# plotting parameters
figure_size = (6.25984252, 4.1456693) # = 15.9, 10.53 cm

def rgb(rgb_255_tuple):
    return tuple(v/255 for v in rgb_255_tuple)

colour_op = rgb((13, 103, 53))
colour_s = rgb((254, 205, 8))
colour_p = rgb((237, 29, 36))

sns.set_palette('muted')

Raw data processing

Inflow profile calculation



In [3]:

    
df = pd.read_csv('CS2 data 1min_2017-09-26.pdi.gz')

# also make forward filled
time_interval = pd.Timedelta(minutes=1)
df = df.pivot_table(values='Value', columns='TagName', index=(pd.to_datetime(df['Date'].str.cat(df['Time'], sep=' ')) - time_interval))
df.index.name = 'DateTime'

df['Time_30'] = df.index.floor('30min').time

df['EskomDayOfWeek'] = df.index.dayofweek + 1
# replace holidays with Eskom days
h_path = '..\holidays.csv'
holidays = np.genfromtxt(h_path,  dtype=np.dtype('M'), delimiter=',', usecols=0)
holidays = list(holidays)
holidays_numbers = np.genfromtxt(h_path, dtype=int, delimiter=',', usecols=1)
holidays_numbers = list(holidays_numbers)
for h_n, h_d in zip(holidays_numbers, holidays):
    df.loc[df.index.date==h_d,'EskomDayOfWeek'] = h_n
# drop weekends
df = df[df['EskomDayOfWeek'] <= 5]
df.drop('EskomDayOfWeek', axis=1)

df.head(5)









    Out[3]:







  
    
      TagName
      KDCE_S10_12PMP00_00ILT#502.FA_PV
      KDCE_S10_12PMP00_00ILT#503.FA_PV
      KDCE_S10_12PMP00_01ILT#503.FA_PV
      KDCE_S10_12PMP02_Machine.FA_RF
      KDCE_S10_12PMP03_Machine.FA_RF
      KDCE_S10_12PMP04_Machine.FA_RF
      KDCE_S10_12PMP06_Machine.FA_RF
      KDCE_S10_12PMP07_Machine.FA_RF
      KDCE_S10_27PMP00_00ILT#504.FA_PV
      KDCE_S10_27PMP00_00ILT#505.FA_PV
      KDCE_S10_27PMP00_01ILT#505.FA_PV
      KDCE_S10_27PMP02_Machine.FA_RF
      KDCE_S10_27PMP03_Machine.FA_RF
      KDCE_S10_27PMP04_Machine.FA_RF
      KDCE_S10_27PMP05_Machine.FA_RF
      Time_30
      EskomDayOfWeek
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-09-01 00:00:00
      52.967242
      40.914352
      42.153806
      1.0
      0.0
      1.0
      0.0
      1.0
      43.670489
      49.681713
      64.149307
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
    
    
      2016-09-01 00:01:00
      53.067131
      40.914352
      42.174387
      1.0
      0.0
      1.0
      0.0
      1.0
      43.621620
      49.681713
      64.149307
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
    
    
      2016-09-01 00:02:00
      53.067131
      40.914352
      42.221463
      1.0
      0.0
      1.0
      0.0
      1.0
      43.732320
      49.681713
      64.149307
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
    
    
      2016-09-01 00:03:00
      53.210744
      40.914352
      42.239582
      1.0
      0.0
      1.0
      0.0
      1.0
      43.664848
      49.681713
      64.149307
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
    
    
      2016-09-01 00:04:00
      53.211807
      40.914352
      42.265280
      1.0
      0.0
      1.0
      0.0
      1.0
      43.689365
      49.681713
      64.149307
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4



In [4]:

    
#df.drop(['KDCE_S10_12PMP00_01ILT#502.FA_PV', 'KDCE_S10_27PMP00_01ILT#504.FA_PV'], axis=1, inplace=True) #broken dam sensor

df = df.rename(columns = {'KDCE_S10_12PMP00_00ILT#502.FA_PV': '12L D1'})
df['12L D2'] = df[['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV']].mean(axis=1)

df = df.rename(columns = {'KDCE_S10_27PMP00_00ILT#504.FA_PV': '27L D1'})
df['27L D2'] = df[['KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV']].mean(axis=1)

df.drop(['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV',
         'KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV'], axis=1, inplace=True)

df['12L Level'] = df[['12L D1', '12L D2']].mean(axis=1)
df['27L Level'] = df[['27L D1', '27L D2']].mean(axis=1)

df.drop(['12L D1', '12L D2', '27L D1', '27L D2'], axis=1, inplace=True)

df.head(5)









    Out[4]:







  
    
      TagName
      KDCE_S10_12PMP02_Machine.FA_RF
      KDCE_S10_12PMP03_Machine.FA_RF
      KDCE_S10_12PMP04_Machine.FA_RF
      KDCE_S10_12PMP06_Machine.FA_RF
      KDCE_S10_12PMP07_Machine.FA_RF
      KDCE_S10_27PMP02_Machine.FA_RF
      KDCE_S10_27PMP03_Machine.FA_RF
      KDCE_S10_27PMP04_Machine.FA_RF
      KDCE_S10_27PMP05_Machine.FA_RF
      Time_30
      EskomDayOfWeek
      12L Level
      27L Level
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-09-01 00:00:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.250660
      50.293000
    
    
      2016-09-01 00:01:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.305750
      50.268565
    
    
      2016-09-01 00:02:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.317519
      50.323915
    
    
      2016-09-01 00:03:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.393856
      50.290179
    
    
      2016-09-01 00:04:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.400812
      50.302437



In [5]:

    
status_col_dict = {}
for c in df.columns:
    if '_Machine.FA_RF' in c: # pump status
        item = c.split('_')[2].split('PMP')
        level = item[0] + 'L'
        pump_num = 'P' + item[1][1]
        status_col_dict[c] = level + ' ' + pump_num
        
df = df.rename(columns = status_col_dict)



In [6]:

    
df.head(5)









    Out[6]:







  
    
      TagName
      12L P2
      12L P3
      12L P4
      12L P6
      12L P7
      27L P2
      27L P3
      27L P4
      27L P5
      Time_30
      EskomDayOfWeek
      12L Level
      27L Level
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-09-01 00:00:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.250660
      50.293000
    
    
      2016-09-01 00:01:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.305750
      50.268565
    
    
      2016-09-01 00:02:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.317519
      50.323915
    
    
      2016-09-01 00:03:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.393856
      50.290179
    
    
      2016-09-01 00:04:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.400812
      50.302437



In [7]:

    
df = df.rename(columns = {'12L P2': '12L P1',
                          '12L P3': '12L P2',
                          '12L P4': '12L P3',
                          '12L P6': '12L P4',
                          '12L P7': '12L P5',
                          '27L P2': '27L P1',
                          '27L P3': '27L P2',
                          '27L P4': '27L P3',
                          '27L P5': '27L P4'})
df.head(5)









    Out[7]:







  
    
      TagName
      12L P1
      12L P2
      12L P3
      12L P4
      12L P5
      27L P1
      27L P2
      27L P3
      27L P4
      Time_30
      EskomDayOfWeek
      12L Level
      27L Level
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-09-01 00:00:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.250660
      50.293000
    
    
      2016-09-01 00:01:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.305750
      50.268565
    
    
      2016-09-01 00:02:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.317519
      50.323915
    
    
      2016-09-01 00:03:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.393856
      50.290179
    
    
      2016-09-01 00:04:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.400812
      50.302437

Process selected data



In [8]:

    
def calculate_inflows(level_name, pump_flowrates, capacity, feeding_level_name=None):
    # feeding_level_name is the level feeding this level
    
    number_of_pumps = len(pump_flowrates)

    calc_pump_flows = [np.nan]
    calc_inflows = [np.nan]

    for i in range(1, len(df.index)):
        pump_status = {}
        for j,_ in enumerate(pump_flowrates):
            pump_str = 'P' + str(j+1)
            pump_status[pump_str] = df[level_name + ' ' + pump_str].values[i]
        
        l_new = df[level_name + ' Level'].values[i]
        l_old = df[level_name + ' Level'].values[i-1]
        pump_flow_from_prev_lev = 0 if feeding_level_name is None else df[feeding_level_name + ' PumpFlow'].values[i]
        
        if np.isnan([v for k,v in pump_status.items()] + [l_new] + [l_old] + [pump_flow_from_prev_lev]).any():
            ans_pumpflow = np.nan
            ans_inflow = np.nan

        delta_level = l_new - l_old
        if delta_level != 0:
            ans_outflow_pumps = 0
            for ps, pf in zip(pump_status.items(), pump_flowrates):
                ans_outflow_pumps += ps[1] * pf
                
            ans_inflow = ((l_new - l_old) / 100 * capacity) / 60 + ans_outflow_pumps - pump_flow_from_prev_lev
        else:
            ans_outflow_pumps = np.nan
            ans_inflow = np.nan

        calc_pump_flows.append(ans_outflow_pumps)
        calc_inflows.append(ans_inflow)

    df[level_name + ' PumpFlow'] = calc_pump_flows
    df[level_name + ' Inflow'] = calc_inflows



In [9]:

    
calculate_inflows('27L', [236.1]*4, 3000000)
calculate_inflows('12L', [194.6]*5, 3000000, '27L')



In [10]:

    
df.head(5)









    Out[10]:







  
    
      TagName
      12L P1
      12L P2
      12L P3
      12L P4
      12L P5
      27L P1
      27L P2
      27L P3
      27L P4
      Time_30
      EskomDayOfWeek
      12L Level
      27L Level
      27L PumpFlow
      27L Inflow
      12L PumpFlow
      12L Inflow
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-09-01 00:00:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.250660
      50.293000
      NaN
      NaN
      NaN
      NaN
    
    
      2016-09-01 00:01:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.305750
      50.268565
      NaN
      NaN
      583.8
      NaN
    
    
      2016-09-01 00:02:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.317519
      50.323915
      NaN
      NaN
      583.8
      NaN
    
    
      2016-09-01 00:03:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.393856
      50.290179
      NaN
      NaN
      583.8
      NaN
    
    
      2016-09-01 00:04:00
      1.0
      0.0
      1.0
      0.0
      1.0
      0.0
      1.0
      1.0
      NaN
      00:00:00
      4
      47.400812
      50.302437
      NaN
      NaN
      583.8
      NaN

Export data for use in simulation

Inflow profile



In [11]:

    
df2 = df.copy()

date_start = '2016-10-25'
date_end = '2016-10-26'

df2 = df2[(df2.index>=date_start) & (df2.index<date_end)]

inflow_profiles = []
overall_day_completeness_count = 0
overall_day_completeness_max = 0
overall_completeness_count = 0
overall_completeness_max = 0
for l in ['27L', '12L']:
    grouped = df2.set_index('Time_30')[l + ' Inflow'].groupby('Time_30')
    
    grouped_mean = grouped.mean()
    inflow_profiles.append(grouped_mean)
    
    print('{} completeness: {:6.2f}% → {:6.2f}%'.format(l, grouped.count().sum()/48/30*100, grouped_mean.count()/48*100))#grouped.count()/30
    overall_day_completeness_count += grouped.count().sum()
    overall_day_completeness_max += 48*30
    overall_completeness_count += grouped_mean.count()
    overall_completeness_max += 48
print('                   ------   -------')
print('All completenes:  {:6.2f}% → {:6.2f}%'.format(overall_day_completeness_count/overall_day_completeness_max*100, overall_completeness_count/overall_completeness_max*100))
    
pd.DataFrame(inflow_profiles).T.to_csv('..\..\simulations\Case_study_2\input\CS2_dam_inflow_profiles.csv.gz', compression='gzip')









    



27L completeness: 100.00% → 100.00%
12L completeness: 100.00% → 100.00%
                   ------   -------
All completenes:  100.00% → 100.00%

Data for validation of the simulation



In [12]:

    
df_real = pd.read_csv('CS2 data 1s 10-25_2017-09-26_pivot.csv.gz')
df_real = df_real.set_index('DateTime')
df_real.index = pd.to_datetime(df_real.index)
df_real.head(5)









    Out[12]:







  
    
      
      KDCE_S10_12PMP00_00ILT#502.FA_PV
      KDCE_S10_12PMP00_00ILT#503.FA_PV
      KDCE_S10_12PMP00_01ILT#503.FA_PV
      KDCE_S10_12PMP02_Machine.FA_RF
      KDCE_S10_12PMP03_Machine.FA_RF
      KDCE_S10_12PMP04_Machine.FA_RF
      KDCE_S10_12PMP06_Machine.FA_RF
      KDCE_S10_12PMP07_Machine.FA_RF
      KDCE_S10_27PMP00_00ILT#504.FA_PV
      KDCE_S10_27PMP00_00ILT#505.FA_PV
      KDCE_S10_27PMP00_01ILT#505.FA_PV
      KDCE_S10_27PMP02_Machine.FA_RF
      KDCE_S10_27PMP03_Machine.FA_RF
      KDCE_S10_27PMP04_Machine.FA_RF
      KDCE_S10_27PMP05_Machine.FA_RF
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-10-25 00:00:00
      47.222221
      47.569447
      31.944445
      1.0
      0.0
      1.0
      0.0
      1.0
      55.989582
      64.641205
      74.479172
      0.0
      1.0
      0.0
      1.0
    
    
      2016-10-25 00:00:01
      47.222221
      47.569447
      31.944445
      1.0
      0.0
      1.0
      0.0
      1.0
      55.989582
      64.641205
      74.479172
      0.0
      1.0
      0.0
      1.0
    
    
      2016-10-25 00:00:02
      47.222221
      47.569447
      31.944445
      1.0
      0.0
      1.0
      0.0
      1.0
      55.989582
      64.641205
      74.479172
      0.0
      1.0
      0.0
      1.0
    
    
      2016-10-25 00:00:03
      47.222221
      47.569447
      31.944445
      1.0
      0.0
      1.0
      0.0
      1.0
      55.989582
      64.641205
      74.479172
      0.0
      1.0
      0.0
      1.0
    
    
      2016-10-25 00:00:04
      47.222221
      47.569447
      31.944445
      1.0
      0.0
      1.0
      0.0
      1.0
      55.989582
      64.641205
      74.479172
      0.0
      1.0
      0.0
      1.0



In [13]:

    
df_real['12L Status'] = df_real[[col for col in df_real.columns if ('12PMP' in col and 'Machine.FA_RF' in col)]].sum(axis=1)
df_real['27L Status'] = df_real[[col for col in df_real.columns if ('27PMP' in col and 'Machine.FA_RF' in col)]].sum(axis=1)
df_real.drop([col for col in df_real.columns if 'Machine.FA_RF' in col], axis=1, inplace=True)

#df_real.drop(['KDCE_S10_12PMP00_01ILT#502.FA_PV', 'KDCE_S10_27PMP00_01ILT#504.FA_PV'], axis=1, inplace=True) #broken dam sensor
df_real = df_real.rename(columns = {'KDCE_S10_12PMP00_00ILT#502.FA_PV': '12L D1'})
df_real['12L D2'] = df_real[['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV']].mean(axis=1)

df_real = df_real.rename(columns = {'KDCE_S10_27PMP00_00ILT#504.FA_PV': '27L D1'})
df_real['27L D2'] = df_real[['KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV']].mean(axis=1)

df_real.drop(['KDCE_S10_12PMP00_00ILT#503.FA_PV', 'KDCE_S10_12PMP00_01ILT#503.FA_PV',
         'KDCE_S10_27PMP00_00ILT#505.FA_PV', 'KDCE_S10_27PMP00_01ILT#505.FA_PV'], axis=1, inplace=True)


df_real['12L Total water'] = 3*1000000/100* df_real[['12L D1', '12L D2']].sum(axis=1)
df_real['27L Total water'] = 3*1000000/100* df_real[['27L D1', '27L D2']].sum(axis=1)
df_real['12L Level'] = df_real['12L Total water']/(2*3*1000000) * 100
df_real['27L Level'] = df_real['27L Total water']/(2*3*1000000) * 100

df_real.dropna(axis=1, how='any', inplace=True)
df_real.head(5)









    Out[13]:







  
    
      
      12L D1
      27L D1
      12L Status
      27L Status
      12L D2
      27L D2
      12L Total water
      27L Total water
      12L Level
      27L Level
    
    
      DateTime
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016-10-25 00:00:00
      47.222221
      55.989582
      3.0
      2.0
      39.756946
      69.560188
      2.609375e+06
      3.766493e+06
      43.489583
      62.774885
    
    
      2016-10-25 00:00:01
      47.222221
      55.989582
      3.0
      2.0
      39.756946
      69.560188
      2.609375e+06
      3.766493e+06
      43.489583
      62.774885
    
    
      2016-10-25 00:00:02
      47.222221
      55.989582
      3.0
      2.0
      39.756946
      69.560188
      2.609375e+06
      3.766493e+06
      43.489583
      62.774885
    
    
      2016-10-25 00:00:03
      47.222221
      55.989582
      3.0
      2.0
      39.756946
      69.560188
      2.609375e+06
      3.766493e+06
      43.489583
      62.774885
    
    
      2016-10-25 00:00:04
      47.222221
      55.989582
      3.0
      2.0
      39.756946
      69.560188
      2.609375e+06
      3.766493e+06
      43.489583
      62.774885



In [14]:

    
date_range_start = '2016-10-25'
date_range_end = '2016-10-26'
df_real2 = df_real.ix[date_range_start:date_range_end]
df_real2[['12L Level','27L Level']].plot()
df_real2[['12L Status','27L Status']].plot()
plt.show()
plt.close('all')









    



C:\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: DeprecationWarning: 
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  This is separate from the ipykernel package so we can avoid doing imports until

Save simulation testing data



In [15]:

    
df_real2[['12L Status', '27L Status', '12L Level', '27L Level']].to_csv('..\..\simulations\Case_study_2\input\CS2_data_for_validation.csv.gz', compression='gzip')

Now perform the simulation in validation mode

If the simulation outputs are reasonably "the same" as the real data, the simulation is "correct"

Run the simulation in validation_mode using initial values for dam level and following pump status instead of scheduler



In [16]:

    
df_sim = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_validation.csv.gz')
df_sim['time_clean'] = pd.to_datetime(df_sim['seconds'], unit='s') + timedelta(days=17099)
df_sim.head(5)









    Out[16]:







  
    
      
      seconds
      27L Level
      27L Status
      12L Level
      12L Status
      Eskom ToU
      Pump system total power
      time_clean
    
  
  
    
      0
      0
      62.774885
      2.0
      43.489583
      3.0
      3
      13821.0
      2016-10-25 00:00:00
    
    
      1
      1
      62.775565
      2.0
      43.487235
      3.0
      3
      13821.0
      2016-10-25 00:00:01
    
    
      2
      2
      62.776246
      2.0
      43.484887
      3.0
      3
      13821.0
      2016-10-25 00:00:02
    
    
      3
      3
      62.776926
      2.0
      43.482539
      3.0
      3
      13821.0
      2016-10-25 00:00:03
    
    
      4
      4
      62.777606
      2.0
      43.480191
      3.0
      3
      13821.0
      2016-10-25 00:00:04



In [17]:

    
def rmse(real, sim):
    return np.sqrt(((real - sim) ** 2).mean())

def r2(x, y):
    return stats.pearsonr(x,y)[0] ** 2

Comparison of simulation and actual data

27L pump status



In [18]:

    
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['27L Status'].values
y2 = df_sim['27L Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))









    



R squared =  1.0
RMSE =  0.0



In [19]:

    
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['27L Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['27L Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_27l_status.png', bbox_inches='tight')
plt.close('all')

27L dam level



In [20]:

    
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['27L Level'].values
y2 = df_sim['27L Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))









    



R squared =  0.995293454095
RMSE =  0.689423123644



In [21]:

    
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['27L Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['27L Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_27l_level.png', bbox_inches='tight')
plt.close('all')

12L pump status



In [22]:

    
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['12L Status'].values
y2 = df_sim['12L Status'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))









    



R squared =  1.0
RMSE =  0.0



In [23]:

    
fig, ax1 = plt.subplots(figsize=(figure_size[0], figure_size[1]/1.5), dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['12L Status'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['12L Status'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Number of pumps running')
ax1.set_ylim([0, 4])
ax1.yaxis.set_major_locator(MaxNLocator(integer=True))

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_12l_status.png', bbox_inches='tight')
plt.close('all')

12L dam level



In [24]:

    
x = pd.to_datetime(df_real2.index.values).time
y1 = df_real2['12L Level'].values
y2 = df_sim['12L Level'].values

print('R squared = ', r2(y1, y2))
print('RMSE = ', rmse(y1, y2))









    



R squared =  0.996963541361
RMSE =  0.71226412034



In [25]:

    
fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

x = df_sim['time_clean']

ax1.plot(x, df_real2['12L Level'], marker=None, label="Actual data", zorder=10)
ax1.plot(x, df_sim['12L Level'], marker=None, label="Simulation data", zorder=11, color=sns.color_palette('muted')[2])

ax1.set_xlabel('Time of day')
ax1.set_ylabel('Dam level (%)')
ax1.set_ylim([0, 100])

ax1.axvspan('2016-10-25 00:00:00','2016-10-25 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('2016-10-25 06:00:00','2016-10-25 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('2016-10-25 07:00:00','2016-10-25 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('2016-10-25 10:00:00','2016-10-25 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 18:00:00','2016-10-25 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('2016-10-25 20:00:00','2016-10-25 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('2016-10-25 22:00:00','2016-10-25 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)

handles, labels = ax1.get_legend_handles_labels()
order = [2, 0, 3, 1, 4]
ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_validation_12l_level.png', bbox_inches='tight')
plt.close('all')

Processing of x-factored simulation data



In [26]:

    
def tou_shade(ax, date):
    y_m_d = '{}-{:02}-{:02}'.format(date.year, date.month, date.day)
    ax.axvspan(y_m_d + ' 00:00:00', y_m_d + ' 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
    ax.axvspan(y_m_d + ' 06:00:00', y_m_d + ' 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
    ax.axvspan(y_m_d + ' 07:00:00', y_m_d + ' 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
    ax.axvspan(y_m_d + ' 10:00:00', y_m_d + ' 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 18:00:00', y_m_d + ' 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 20:00:00', y_m_d + ' 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
    ax.axvspan(y_m_d + ' 22:00:00', y_m_d + ' 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

def sim_plot(case_study, factor, mode, level_name, level_limits, x, y, y_lim=100, save_fig=False, chart_type_override=None):
    fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)

    if mode != 'power' and mode != 'power_raw':
        for l in level_limits:
            ax1.plot([x[0], x[len(x)-1]], [l, l], ls=':', color=sns.color_palette('muted')[0], label='Dam level limits')

        lines1 = ax1.plot(x, df[level_name + ' Level'])

        ax2 = ax1.twinx()
        lines2 = ax2.plot(x, df[level_name + ' Status'], color=sns.color_palette('muted')[2])
        
        tou_shade(ax2, x[0])

        ax1.set_ylabel('Dam level (%)')
        ax1.set_ylim([0, y_lim])
        ax2.set_ylabel('Number of pumps running')
        ax2.yaxis.set_major_locator(MaxNLocator(integer=True))
        
        ax1.set_zorder(ax2.get_zorder()+1)
        ax1.patch.set_visible(False)

        handles, labels = ax1.get_legend_handles_labels()
        handles2, labels2 = ax2.get_legend_handles_labels()
        del handles[1]
        del labels[1]
        handles += handles2
        labels += labels2
        order = [3, 1, 4, 0, 5, 2]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)

        ax1.yaxis.label.set_color(sns.color_palette('muted')[0])
        ax2.yaxis.label.set_color(sns.color_palette('muted')[2])
    
    else:
        lbl = 'Power (resampled, step plot)' if mode == 'power' else 'Power (raw data, 1 second)'
        ax1.plot(x, y, drawstyle='steps-post', label=lbl, zorder=3)
        tou_shade(ax1, x[0])
        ax1.set_ylabel('Power (kW)')
        if y_lim is not None:
            ax1.set_ylim([0, y_lim])
        
        handles, labels = ax1.get_legend_handles_labels()
        order = [1, 0, 2, 3]
        ax1.legend([handles[idx] for idx in order],[labels[idx] for idx in order], bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode='expand', borderaxespad=0.)
    
    ax1.set_xlabel('Time of day')
    if mode != 'power' and mode != 'power_raw':
        ax1.yaxis.set_ticks(np.arange(0, 101, 10))
    ax1.xaxis.set_major_locator(mdates.HourLocator())
    ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
    fig.autofmt_xdate(rotation=90)
    ax1.set_xlim((min(x), max(x)))

    ax1.grid(which='major', alpha=0.5)
    
    fig.tight_layout()
    
    if save_fig:
        if mode == 'power_raw':
            chart_type = 'power_raw'
        elif mode == 'power':
            chart_type = 'power_resampled'
        else:
            chart_type = 'dam_level_and_status'
                
        filename = 'output/CS{}_{}_{}_{}.png'.format(case_study, level_name, factor, chart_type)
        fig.savefig(filename, bbox_inches='tight')
    
    return fig

1-factor



In [27]:

    
df = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_1-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head(5)









    Out[27]:







  
    
      
      seconds
      27L Level
      27L Status
      12L Level
      12L Status
      Eskom ToU
      Pump system total power
      time_clean
    
  
  
    
      0
      0
      62.774885
      2.0
      43.489583
      3.0
      3
      13821.0
      1970-01-01 00:00:00
    
    
      1
      1
      62.775565
      2.0
      43.487235
      3.0
      3
      13821.0
      1970-01-01 00:00:01
    
    
      2
      2
      62.776246
      2.0
      43.484887
      3.0
      3
      13821.0
      1970-01-01 00:00:02
    
    
      3
      3
      62.776926
      2.0
      43.482539
      3.0
      3
      13821.0
      1970-01-01 00:00:03
    
    
      4
      4
      62.777606
      2.0
      43.480191
      3.0
      3
      13821.0
      1970-01-01 00:00:04



In [28]:

    
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))









    



27L Min: 24.985448049954787%  Max: 73.56603136848814%
12L Min: 24.99436643003447%  Max: 80.00348510880113%

Plot dam level and status



In [29]:

    
fig = sim_plot(2, 1, 'level', '27L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [30]:

    
fig = sim_plot(2, 1, 'level', '12L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)

Plot power

Raw data



In [31]:

    
fig = sim_plot(2, 1, 'power_raw', None, None, df['time_clean'], df['Pump system total power'], y_lim=None, save_fig=True)

Resampled data



In [32]:

    
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS2_resampled_power_1_factor.csv')

fig, ax1 = plt.subplots(figsize=figure_size, dpi=1200)
#fig.patch.set_facecolor('grey')

x = resampled.index
y = resampled['total_30_minute']

ax1.plot(x,y, 'x', label="Power (data points resampled to 30 minutes)", zorder=4)
ax1.plot(x,y, '--', label="Power (resampled, line plot)", zorder=2)
ax1.plot(x,y, drawstyle='steps-post', label="Power (resampled, step plot)", zorder=3)
ax1.set_xlabel('Time of day')
ax1.set_ylabel('Power consumption (kW)')
#ax1.set_ylim([0, 4000])

ax1.axvspan('1970-01-01 00:00:00','1970-01-01 05:59:59', alpha=.25, color=colour_op, lw=0, zorder=0, label='Off-peak')
ax1.axvspan('1970-01-01 06:00:00','1970-01-01 06:59:59', alpha=.25, color=colour_s, lw=0, zorder=0, label='Standard')
ax1.axvspan('1970-01-01 07:00:00','1970-01-01 09:59:59', alpha=.25, color=colour_p, lw=0, zorder=0, label='Peak')
ax1.axvspan('1970-01-01 10:00:00','1970-01-01 17:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 18:00:00','1970-01-01 19:59:59', alpha=.25, color=colour_p, lw=0, zorder=0)
ax1.axvspan('1970-01-01 20:00:00','1970-01-01 21:59:59', alpha=.25, color=colour_s, lw=0, zorder=0)
ax1.axvspan('1970-01-01 22:00:00','1970-01-01 23:59:59', alpha=.25, color=colour_op, lw=0, zorder=0)

ax1.xaxis.set_major_locator(mdates.HourLocator())
ax1.xaxis.set_minor_locator(mdates.MinuteLocator(byminute=30))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
fig.autofmt_xdate(rotation=90)
ax1.set_xlim((min(x),max(x)))

ax1.grid(which='major', alpha=0.5)
#ax1.grid(which='minor', alpha=0.25)

plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

fig.tight_layout()
plt.show()
fig.savefig('output/CS2_1_power_resampled_all.png', bbox_inches='tight')
plt.close('all')



In [33]:

    
fig = sim_plot(2, 1, 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)

2-factor



In [34]:

    
df = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_2-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head(5)









    Out[34]:







  
    
      
      seconds
      27L Level
      27L Status
      12L Level
      12L Status
      Eskom ToU
      Pump system total power
      time_clean
    
  
  
    
      0
      0
      62.774885
      2.0
      43.489583
      3.0
      3
      13821.0
      1970-01-01 00:00:00
    
    
      1
      1
      62.775565
      2.0
      43.487235
      3.0
      3
      13821.0
      1970-01-01 00:00:01
    
    
      2
      2
      62.776246
      2.0
      43.484887
      3.0
      3
      13821.0
      1970-01-01 00:00:02
    
    
      3
      3
      62.776926
      2.0
      43.482539
      3.0
      3
      13821.0
      1970-01-01 00:00:03
    
    
      4
      4
      62.777606
      2.0
      43.480191
      3.0
      3
      13821.0
      1970-01-01 00:00:04



In [35]:

    
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))









    



27L Min: 24.985448049954787%  Max: 73.56603136848814%
12L Min: 24.99436643003447%  Max: 80.00348510880113%

Plot dam level and status



In [36]:

    
fig = sim_plot(2, 2, 'level', '27L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [37]:

    
fig = sim_plot(2, 2, 'level', '12L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)

Plot power



In [38]:

    
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS2_resampled_power_2_factor.csv')

fig = sim_plot(2, 2, 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)

n-factor



In [39]:

    
df = pd.read_csv('..\..\simulations\Case_study_2\output\CS2_simulation_data_export_n-factor.csv.gz')
df['time_clean'] = pd.to_datetime(df['seconds'], unit='s')
df.head(5)









    Out[39]:







  
    
      
      seconds
      27L Level
      27L Status
      12L Level
      12L Status
      Eskom ToU
      Pump system total power
      time_clean
    
  
  
    
      0
      0
      62.774885
      2.0
      43.489583
      3.0
      3
      13821.0
      1970-01-01 00:00:00
    
    
      1
      1
      62.775565
      2.0
      43.487235
      3.0
      3
      13821.0
      1970-01-01 00:00:01
    
    
      2
      2
      62.776246
      2.0
      43.484887
      3.0
      3
      13821.0
      1970-01-01 00:00:02
    
    
      3
      3
      62.776926
      2.0
      43.482539
      3.0
      3
      13821.0
      1970-01-01 00:00:03
    
    
      4
      4
      62.777606
      2.0
      43.480191
      3.0
      3
      13821.0
      1970-01-01 00:00:04



In [40]:

    
for c in df.columns:
    if 'Level' in c:
        print('{} Min: {}%  Max: {}%'.format(c.replace(' Level', ''), df[c].min(), df[c].max()))









    



27L Min: 29.994697111926893%  Max: 73.56603136848814%
12L Min: 31.13030737900091%  Max: 80.00198402436575%

Plot dam level and status



In [41]:

    
fig = sim_plot(2, 'n', 'level', '27L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)



In [42]:

    
fig = sim_plot(2, 'n', 'level', '12L', [25, 85], df['time_clean'], None, y_lim=100, save_fig=True)

Plot power



In [43]:

    
resampled = pd.DataFrame()
resampled["total_30_minute"] = df.set_index('time_clean')['Pump system total power'].resample('30T').mean()
resampled.to_csv('output/CS2_resampled_power_n_factor.csv')

fig = sim_plot(2, 'n', 'power', None, None, resampled.index, resampled['total_30_minute'], y_lim=None, save_fig=True)

Scoring graph



In [44]:

    
scores = pd.read_csv('scoring_results.csv').set_index('Score')
scores









    Out[44]:







  
    
      
      1-factor
      2-factor
      n-factor
    
    
      Score
      
      
      
    
  
  
    
      Feature
      16.666667
      20.370370
      33.333333
    
    
      Performance
      36.712851
      36.712851
      63.348608



In [45]:

    
sns.set()

score_sim = (scores['1-factor']['Performance'], scores['2-factor']['Performance'], scores['n-factor']['Performance'])
score_feat = (scores['1-factor']['Feature'], scores['2-factor']['Feature'], scores['n-factor']['Feature'])
ind = np.arange(len(score_sim))    # the x locations for the groups
width = 0.5       # the width of the bars: can also be len(x) sequence

plt.figure(figsize=figure_size)

p1 = plt.bar(ind, score_sim, width)
p2 = plt.bar(ind, score_feat, width, bottom=score_sim)


plt.ylabel('Scores')
plt.xlabel('Control system')
    #'x-factored simulation')
plt.xticks(ind, ('1-factor', '2-factor', r'$n$-factor'))
#plt.yticks(np.arange(0, 101, 10))
plt.gca().yaxis.set_major_locator(plt.NullLocator())
#plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), loc='best')
plt.legend((p1[0], p2[0]), ('Performance score', 'Feature score'), bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)

for r1, r2 in zip(p1, p2):
    h1 = r1.get_height()
    h2 = r2.get_height()
    plt.text(r1.get_x() + r1.get_width() / 2., h1 / 2., "%.2f" % h1, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 / 2., "%.2f" % h2, ha="center", va="center", color='white')
    plt.text(r2.get_x() + r2.get_width() / 2., h1 + h2 + 1.5, "%.2f" % (h1 + h2),
         ha="center", va="center", color='black', weight='bold')


plt.grid(False)

plt.tight_layout()

#plt.show()

plt.savefig('output/CS2_final_scores.png', bbox_inches='tight', figsize=figure_size, dpi=1200)
plt.close('all')



In [ ]:

TagName	KDCE_S10_12PMP00_00ILT#502.FA_PV	KDCE_S10_12PMP00_00ILT#503.FA_PV	KDCE_S10_12PMP00_01ILT#503.FA_PV	KDCE_S10_12PMP02_Machine.FA_RF	KDCE_S10_12PMP03_Machine.FA_RF	KDCE_S10_12PMP04_Machine.FA_RF	KDCE_S10_12PMP06_Machine.FA_RF	KDCE_S10_12PMP07_Machine.FA_RF	KDCE_S10_27PMP00_00ILT#504.FA_PV	KDCE_S10_27PMP00_00ILT#505.FA_PV	KDCE_S10_27PMP00_01ILT#505.FA_PV	KDCE_S10_27PMP02_Machine.FA_RF	KDCE_S10_27PMP03_Machine.FA_RF	KDCE_S10_27PMP04_Machine.FA_RF	KDCE_S10_27PMP05_Machine.FA_RF	Time_30	EskomDayOfWeek
DateTime
2016-09-01 00:00:00	52.967242	40.914352	42.153806	1.0	0.0	1.0	0.0	1.0	43.670489	49.681713	64.149307	0.0	1.0	1.0	NaN	00:00:00	4
2016-09-01 00:01:00	53.067131	40.914352	42.174387	1.0	0.0	1.0	0.0	1.0	43.621620	49.681713	64.149307	0.0	1.0	1.0	NaN	00:00:00	4
2016-09-01 00:02:00	53.067131	40.914352	42.221463	1.0	0.0	1.0	0.0	1.0	43.732320	49.681713	64.149307	0.0	1.0	1.0	NaN	00:00:00	4
2016-09-01 00:03:00	53.210744	40.914352	42.239582	1.0	0.0	1.0	0.0	1.0	43.664848	49.681713	64.149307	0.0	1.0	1.0	NaN	00:00:00	4
2016-09-01 00:04:00	53.211807	40.914352	42.265280	1.0	0.0	1.0	0.0	1.0	43.689365	49.681713	64.149307	0.0	1.0	1.0	NaN	00:00:00	4

	KDCE_S10_12PMP00_00ILT#502.FA_PV	KDCE_S10_12PMP00_00ILT#503.FA_PV	KDCE_S10_12PMP00_01ILT#503.FA_PV	KDCE_S10_12PMP02_Machine.FA_RF	KDCE_S10_12PMP03_Machine.FA_RF	KDCE_S10_12PMP04_Machine.FA_RF	KDCE_S10_12PMP06_Machine.FA_RF	KDCE_S10_12PMP07_Machine.FA_RF	KDCE_S10_27PMP00_00ILT#504.FA_PV	KDCE_S10_27PMP00_00ILT#505.FA_PV	KDCE_S10_27PMP00_01ILT#505.FA_PV	KDCE_S10_27PMP02_Machine.FA_RF	KDCE_S10_27PMP03_Machine.FA_RF	KDCE_S10_27PMP04_Machine.FA_RF	KDCE_S10_27PMP05_Machine.FA_RF
DateTime
2016-10-25 00:00:00	47.222221	47.569447	31.944445	1.0	0.0	1.0	0.0	1.0	55.989582	64.641205	74.479172	0.0	1.0	0.0	1.0
2016-10-25 00:00:01	47.222221	47.569447	31.944445	1.0	0.0	1.0	0.0	1.0	55.989582	64.641205	74.479172	0.0	1.0	0.0	1.0
2016-10-25 00:00:02	47.222221	47.569447	31.944445	1.0	0.0	1.0	0.0	1.0	55.989582	64.641205	74.479172	0.0	1.0	0.0	1.0
2016-10-25 00:00:03	47.222221	47.569447	31.944445	1.0	0.0	1.0	0.0	1.0	55.989582	64.641205	74.479172	0.0	1.0	0.0	1.0
2016-10-25 00:00:04	47.222221	47.569447	31.944445	1.0	0.0	1.0	0.0	1.0	55.989582	64.641205	74.479172	0.0	1.0	0.0	1.0

	seconds	27L Level	27L Status	12L Level	12L Status	Eskom ToU	Pump system total power	time_clean
0	0	62.774885	2.0	43.489583	3.0	3	13821.0	2016-10-25 00:00:00
1	1	62.775565	2.0	43.487235	3.0	3	13821.0	2016-10-25 00:00:01
2	2	62.776246	2.0	43.484887	3.0	3	13821.0	2016-10-25 00:00:02
3	3	62.776926	2.0	43.482539	3.0	3	13821.0	2016-10-25 00:00:03
4	4	62.777606	2.0	43.480191	3.0	3	13821.0	2016-10-25 00:00:04

	1-factor	2-factor	n-factor
Score
Feature	16.666667	20.370370	33.333333
Performance	36.712851	36.712851	63.348608